library(tidyverse)
library(sf)
library(tigris)
library(leaflet)
Create bar or line charts showing monthly total kBTUs of residential and commercial electricity and gas consumption for the Bay Area (meaning the sum of all ZIP codes in the 9 Bay Area counties) from 2017 to the latest available month
ca_counties <- counties("06", cb = T, progress_bar = F)
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
ca_counties %>%
filter(NAME %in% bay_county_names)
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
bay_zipcodes <- bay_zips$ZCTA5CE10
#read data from 2017 to 1029
years <- c(2017,2018,2019)
quarters <- 1:4
pge_Gas <- NULL
pge_Elec <- NULL
for(year in years) {
for(quarter in quarters){
filename_Gas <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_GasUsageByZip.csv"
)
filename_Elec <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_ElectricUsageByZip.csv"
)
aux_Gas <- read_csv(filename_Gas)
pge_Gas <- rbind(pge_Gas, aux_Gas)
aux_Elec <- read_csv(filename_Elec)
pge_Elec <- rbind(pge_Elec, aux_Elec)
}
}
#read data for 2020
year <- 2020
quarters <- 1:2
for(quarter in quarters) {
filename_Gas <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_GasUsageByZip.csv"
)
filename_Elec <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_ElectricUsageByZip.csv"
)
aux_Gas <- read_csv(filename_Gas)
pge_Gas <- rbind(pge_Gas, aux_Gas)
aux_Elec <- read_csv(filename_Elec)
pge_Elec <- rbind(pge_Elec, aux_Elec)
}
#remove auxiliary variables
rm(aux_Gas)
rm(aux_Elec)
#abstract bay area electric consumption
pge_Elec$ZIPCODE <- as.character(pge_Elec$ZIPCODE)
pge_Elec_final <-
pge_Elec %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in%
bay_zipcodes
) %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
TOTALKWH =
sum(
TOTALKWH,
na.rm = T
)
) %>%
mutate(
TOTALKBTU =
TOTALKWH*3.41214
) %>%
select(
!c(TOTALKWH)
)
#abstract bay area gas consumption
#during debugging found that the ZIPCODE in pge_Gas datafram is integer-typed
#therefore, need to transform data type
pge_Gas$ZIPCODE <- as.character(pge_Gas$ZIPCODE)
pge_Gas_final <-
pge_Gas %>%
filter(
CUSTOMERCLASS %in%
c(
"Gas- Residential",
"Gas- Commercial"
)
) %>%
filter(
ZIPCODE %in%
bay_zipcodes
) %>%
select(
!c(COMBINED, AVERAGETHM, TOTALCUSTOMERS)
) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
TOTALTHM =
sum(
TOTALTHM,
na.rm = T
)
) %>%
mutate(
TOTALKBTU =
TOTALTHM*99.9761
) %>%
select(
!c(TOTALTHM)
)
# concatenate gas and electric usage data
pge <- rbind(pge_Gas_final,pge_Elec_final)
pge$MONTH <- month.abb[pge$MONTH]
pge_final <-
pge %>%
mutate(
Time =
paste0(
MONTH,
YEAR
)
)
#create a vector of characters containning ordered dates (MonYear)
time_levels = c()
time_levels_less = c()
for(i in seq(1, 83, by=2)){
time_levels <- c(time_levels,pge_final$Time[i])
}
for(i in seq(1, 83, by=8)){
time_levels_less <- c(time_levels_less,pge_final$Time[i])
}
pge_chart <-
pge_final %>%
ggplot() +
geom_bar(
aes(
x = Time %>% factor(
levels=time_levels,
ordered = TRUE),
y = TOTALKBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Time",
y = "kBTU",
title = "PG&E Bay Area Monthly Gas & Electricity Usage, Jan 2017 to Jun 2020",
fill = "Gas or Electricity Type"
) +
scale_x_discrete(breaks = time_levels_less)
PG&E Bay Area Monthly total gas & electricity Usage (in kBTU) from Jan 2017 to Jun 2020
As we can see from the bar chart, the total monthly electricity consumptions for commerical customers during the shelter-in-place period (March 2020 to June 2020) are smaller than those of the same months but in previous years when covid-19 didn’t lead to the closure of many commercial sectors.
Create at least one map that highlights which neighborhoods experienced the greatest change in electricity consumption before and after the pandemic began, and comment on your results.
#"DoI" means Duration of Interest, which is March to Jun (shelter-in-place)
#abstract data for the duratio of interest in 2020
pge_Elec_Bay_2020_DoI <-
pge_Elec %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in% bay_zipcodes
) %>%
filter(
YEAR == 2020
) %>%
filter(
MONTH %in% c(3,4,5,6)
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
)
pge_Elec_Bay_2019_DoI <-
pge_Elec %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in% bay_zipcodes
) %>%
filter(
YEAR == 2019
) %>%
filter(
MONTH %in% c(3,4,5,6)
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
)
#calculate the average total consuption of 2017, 2018, 2019 for each zipcode in bay area
pge_Elec_Bay_Past_DoI <-
pge_Elec %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in% bay_zipcodes
) %>%
filter(
MONTH %in% c("3","4","5","6")
) %>%
filter(
YEAR %in% c(2017,2018,2019)
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
) %>%
mutate(
AVERAGE_3YEAR =
TOTALKWH/3.0
) %>%
select(
!c(TOTALKWH)
)
elec_change_19_20 <- merge(
pge_Elec_Bay_2020_DoI, pge_Elec_Bay_2019_DoI, by = c("ZIPCODE")
) %>%
mutate(
CHANGE = TOTALKWH.x-TOTALKWH.y
) %>%
select(
!c(TOTALKWH.x,TOTALKWH.y)
) %>%
left_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
elec_change_past_20 <- merge(
pge_Elec_Bay_2020_DoI, pge_Elec_Bay_Past_DoI, by = c("ZIPCODE")
) %>%
mutate(
CHANGE = TOTALKWH - AVERAGE_3YEAR
) %>%
select(
!c(TOTALKWH,AVERAGE_3YEAR)
) %>%
left_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
res_pal_1 <- colorNumeric(
palette = c("white", "red"),
domain =
elec_change_19_20$CHANGE
)
map_1 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_19_20,
fillColor = ~res_pal_1(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_19_20,
pal = res_pal_1,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, 2019 vs 2020"
)
map_1
res_pal_2 <- colorNumeric(
palette = c("white", "red"),
domain =
elec_change_past_20$CHANGE
)
map_2 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_past_20,
fillColor = ~res_pal_2(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_past_20,
pal = res_pal_2,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, the average of 2017, 2018 2019 vs 2020"
)
map_2
res_pal_3 <- colorNumeric(
palette = "Blues",
domain =
-elec_change_19_20$CHANGE
)
map_3 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_19_20,
fillColor = ~res_pal_3(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_19_20,
pal = res_pal_3,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, 2019 vs 2020"
)
map_3
res_pal_4 <- colorNumeric(
palette = "Blues",
domain =
-elec_change_past_20$CHANGE
)
map_4 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_past_20,
fillColor = ~res_pal_4(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_past_20,
pal = res_pal_4,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, the average of 2017, 2018 2019 vs 2020"
)
map_4